{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.8.5 64-bit ('mot': conda)",
   "metadata": {
    "interpreter": {
     "hash": "89da8cadc07d614e8ffb7449fd5b755cd3905d2c32ea8a31c8bd4ed7a73170e2"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "## 矩阵欧式距离"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def euclidean_distances(x,y,squared=True):\n",
    "    \"\"\"\n",
    "    计算成对（平方）的欧几里得距离。\n",
    "    x: m × k; \n",
    "    y: n × k\n",
    "    \"\"\"\n",
    "    assert isinstance(x, np.ndarray) and x.ndim == 2  # 断言是否是数组且x维度=2\n",
    "    assert isinstance(y, np.ndarray) and y.ndim == 2  # 断言是否是数组且y维度=2\n",
    "    assert x.shape[1] == y.shape[1]  # 维度1,向量个数是否相同(同K)\n",
    "    # 求每行向量平方和,且保持维度。 dim=(m,1)\n",
    "    x_square = np.sum(x*x, axis=1, keepdims=True)  \n",
    "    if x is y:\n",
    "        y_square = x_square.T  # 当x=y时,y中向量的平方和等于x平方和的转置\n",
    "    else:\n",
    "        # 求每行向量平方和,且保持维度,并转置。 dim=(1,n)\n",
    "        y_square = np.sum(y*y,axis=1,keepdims=True).T  \n",
    "    distances = np.dot(x,y.T)  # x @ y^T\n",
    "    distances *= -2  # -2 × (x @ y^T)。 dim=(m,n)\n",
    "    distances += x_square  # 广播机制相加\n",
    "    distances += y_square\n",
    "    # 由于浮点舍入错误，结果可能小于0。\n",
    "    np.maximum(distances,0,distances)\n",
    "    if x is y:\n",
    "        # 确保向量与自身的距离为0\n",
    "        # This may not be the case due to floating point rounding errors.\n",
    "        distances.flat[::distances.shape[0] + 1] = 0.0\n",
    "    if not squared:\n",
    "        distances=np.sqrt(distances)  # 如果不是平方和距离则开根号\n",
    "    return distances\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "(3, 5) (4, 5)\n"
     ]
    }
   ],
   "source": [
    "x = np.array([[11,12,4,6,8],[2,3,7,5,9],[14,8,9,11,10]])\n",
    "y = np.array([[12,10,8,9,5],[5,4,7,11,10],[13,15,14,7,9],[18,5,9,5,7]])\n",
    "print(x.shape,y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[ 39, 138, 115, 125],\n",
       "       [182,  47, 318, 268],\n",
       "       [ 38, 101,  92,  70]])"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "source": [
    "euclidean_distances(x,y,True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[ 6.244998  , 11.74734012, 10.72380529, 11.18033989],\n",
       "       [13.49073756,  6.8556546 , 17.8325545 , 16.37070554],\n",
       "       [ 6.164414  , 10.04987562,  9.59166305,  8.36660027]])"
      ]
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "source": [
    "euclidean_distances(x,y,False)"
   ]
  },
  {
   "source": [
    "## 余弦相似度与余弦距离"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cosine_Similarity(a, b):\n",
    "    \"\"\"\n",
    "    计算余弦相似度。\n",
    "    a: array_like\n",
    "        N×M矩阵\n",
    "    b: array_like\n",
    "        L×M矩阵\n",
    "    \"\"\"\n",
    "    # 先转numpy后进行归一化(L2归一)\n",
    "    a = np.asarray(a) / np.linalg.norm(a,axis=1,keepdims=True)\n",
    "    b = np.asarray(b) / np.linalg.norm(b,axis=1,keepdims=True)\n",
    "\n",
    "    return np.dot(a, b.T)  # 余弦相似度\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[0.9258201 , 0.64465837],\n",
       "       [0.69631062, 1.        ]])"
      ]
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "source": [
    "a = np.array([[2,1,1,0,1],[1,0,2,1,4]])\n",
    "b = np.array([[2,0,1,0,1],[1,0,2,1,4]])\n",
    "cosine_Similarity(a,b)  # 余弦相似度,越相似值越接近1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cosine_distance(a, b):\n",
    "    \"\"\"\n",
    "    计算余弦距离。\n",
    "    a: array_like\n",
    "        N×M矩阵\n",
    "    b: array_like\n",
    "        L×M矩阵\n",
    "    \"\"\"\n",
    "    return 1 - cosine_Similarity(a, b)  # 余弦距离,即(1-余弦相似度)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[1.27128439e-01, 7.41799002e-02, 3.55341629e-01],\n",
       "       [1.38359563e-01, 3.03689376e-01, 1.11022302e-16]])"
      ]
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "source": [
    "cosine_distance(a, b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[0.127128, 0.07418 , 0.355342],\n",
       "       [0.13836 , 0.303689, 0.      ]])"
      ]
     },
     "metadata": {},
     "execution_count": 11
    }
   ],
   "source": [
    "np.around(cosine_distance(a, b),decimals=6)  # 保留6位来观察"
   ]
  },
  {
   "source": [
    "## torch中距离公式"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "`TripletMarginWithDistanceLoss(distance_function)`  \n",
    "Math:\n",
    "$$\n",
    "\\ell(a, p, n) = L = \\{l_1,\\dots,l_N\\}^\\top, \\quad\n",
    "        l_i = \\max \\{d(a_i, p_i) - d(a_i, n_i) + {\\rm margin}, 0\\} \n",
    "$$\n",
    "\n",
    "$$\n",
    "\\ell(x, y) =\n",
    "    \\begin{cases}\n",
    "        \\operatorname{mean}(L), &  \\text{if reduction} = \\text{`mean';}\\\\\n",
    "        \\operatorname{sum}(L),  &  \\text{if reduction} = \\text{`sum'.}\n",
    "    \\end{cases}\n",
    "$$"
   ],
   "cell_type": "markdown",
   "metadata": {}
  }
 ]
}